clear
set more off
capture log close




/* SEQUENTIAL MODEL */
local model_descr "empu"



* CHOOSE subsample  

  *income years for children (not survey years)
  loc yrmin=${yrmin} // earliest possible=1985	
  loc yrmax=${yrmax} // latest possible=2018
 
	*STUBNAME for saving log file and output matrix/dataset 
	local modelnum "5b"
	global stubfilenm "model_`modelnum'_dau_empu_`yrmin'_`yrmax'"
  
	*LOG file 
	log using ${us_results}/${stubfilenm}.log, replace
  
  *child ages to observe their adult incomes
  loc agemin=25 // youngest possible=25
  loc agemax=48 // oldest possible=48
  
  *child birth cohorts to use
  loc cohortmin=1952 // earliest possible=1952
  loc cohortmax=1993 // latest possible=1993



*USE the ranked MAIN SAMPLE 
use *newid *LAB* AGE *cohort *LABYR* *MF* *empl* *schmax year female if year>=1985 using ${projdata}/analysis-sample-main.dta, clear

gen AGEC1=year-cohort-40 
gen f_LABAGE_1=f_LABYR-f_cohort 
gen m_LABAGE_1=m_LABYR-m_cohort 

forval i=2/4{
	gen AGEC`i'=AGEC1^`i'
	gen f_LABAGE_`i'=f_LABAGE_1^`i'
	gen m_LABAGE_`i'=m_LABAGE_1^`i'
}


* SAMPLE selection

* Use sample with mothers AND fathers
 keep if pm_LAB!=. & m_LABAGE_1!=. & pf_LAB!=. & f_LABAGE_1!=.
 keep if m_emplavg!=. & f_emplavg!=.
 keep if m_schmax!=. & f_schmax!=.
 keep if female==1 & employ!=.
 
* Get part of daughters' income rank NOT explained by employment 
 qui regress pLAB employ m_LABAGE_? f_LABAGE_?   AGEC? i.year 
   predict pLAB_r, resid 
 
* Residualize all measures using mother's AND father's age (quartic)
 foreach var in schmax pm_LAB m_emplavg m_schmax pf_LAB f_emplavg f_schmax  {  // pLAB employ  
	qui regress `var' m_LABAGE_? f_LABAGE_?   AGEC? i.year
	predict `var'_r, resid
 } 
 
* Subsample 
 keep if inrange(year,`yrmin',`yrmax') & inrange(AGE,`agemin',`agemax')
 tab year, su(AGE)
 
 




* ESTIMATE MODEL 

scalar drop _all

*Sample sizes
qui reg pLAB_r pf_LAB_r, cluster(newid)
scalar N=e(N)
scalar Nkids=e(N_clust)
qui reg pLAB_r pf_LAB_r pm_LAB_r, cluster(f_newid)
scalar Nfathers=e(N_clust)
qui reg pLAB_r pf_LAB_r pm_LAB_r, cluster(m_newid)
scalar Nmothers=e(N_clust)

scalar list Nkids Nfathers Nmothers

* "IGE"
 reg pLAB_r pm_LAB_r 
 scalar IGE=_b[pm_LAB_r]


*** PARENT
* Assortative mating (focusing on mothers so regress fathers on mothers)
 reg f_schmax_r m_schmax_r 
	predict eta_sp, resid 
	scalar d1=_b[m_schmax_r]

* Parent employment: Ep = phi1*Hp + phi2*Hsp + eta_ep
 reg m_emplavg_r m_schmax_r f_schmax_r 
 	predict eta_ep, resid
	scalar phi1=_b[m_schmax_r] 
	scalar phi2=_b[f_schmax_r]
	
 reg m_emplavg_r f_schmax_r // look at unconditional employment response to spouse HC
	
* Parent income: Yp = O_0p*Ep + O_1p*Hp + eta_p
 reg pm_LAB_r m_emplavg_r m_schmax_r  	

* Parent Income: Yp = [O_0p(phi1+phi2*d1)+O_1p]*Hp + O_0p*phi2*eta_sp + O_0p*eta_ep + eta_p	
	gen eta_spep=(phi2*eta_sp)+eta_ep 
	reg pm_LAB_r m_schmax_r eta_spep
	predict eta_p, resid  
		
	scalar O_p=_b[m_schmax_r] 						//  get coefficient = estimate of (theta_0p + theta_1p)
	scalar O_0p=_b[eta_spep] 						//  get coefficient = estimate of theta_0p 
	scalar O_1p=_b[m_schmax_r]-(((d1*phi2)+phi1)*_b[eta_spep]) 	// calc coefficient = estimate of theta_1p 
	
	scalar list 
			
	
* Check predicted parent income rank.
   gen pm_LAB_pred=O_p*m_schmax_r + O_0p*phi2*eta_sp + O_0p*eta_ep + eta_p  		
   sum pm_LAB_r pm_LAB_pred  
   corr pm_LAB_r pm_LAB_pred 
   corr pm_LAB_r pm_LAB_pred, cov 
   reg pLAB_r pm_LAB_r
   reg pLAB_r pm_LAB_pred
        
  
  
*** SPOUSE  
* Spouse assortative mating?? (to get AM residuals, eta_ss)
 reg m_schmax_r f_schmax_r
	predict eta_ss, resid 	
	scalar d1s=_b[f_schmax_r]

	reg f_schmax_r m_emplavg_r  // look at unconditional employment response to spouse HC

* Spouse Employment (to get employment residuals, eta_es)
  reg f_emplavg_r f_schmax_r m_schmax_r 
 	predict eta_es, resid
	scalar phi1s=_b[f_schmax_r] 
	scalar phi2s=_b[m_schmax_r]
	
* Spouse Income (to get residuals, eta_s; also get O_s HC effect and O_0s)
  gen eta_sses=(phi2s*eta_ss)+eta_es
  reg pf_LAB_r f_schmax_r eta_sses 
	predict eta_s, resid  
	scalar O_s=_b[f_schmax_r]
	scalar O_0s=_b[eta_sses]
	scalar O_1s=_b[f_schmax_r]-(((d1s*phi2s)+phi1s)*_b[eta_sses]) 	// calc coefficient = estimate of theta_1s 
	
	scalar list 
	
*Check spouse predicted income rank.
   gen pf_LAB_pred=O_s*f_schmax_r + O_0s*phi2s*eta_ss + O_0s*eta_es + eta_s 
   sum pf_LAB_r pf_LAB_pred 



*** CHILD 
* Child income
 reg pLAB_r schmax_r 
	predict u_c, resid 
   
* Child human capital: Hc = pi1*Yp + pi2*Hp + psi_c	
 reg schmax_r pm_LAB_r m_schmax_r 
	predict psi_c, resid // residual child HC 

	
* Child income - Get pi1, pi2, pi1s, pi2s (restrict pi1, pi1s)
 generate eta_py= (O_0p*phi2*eta_sp)+(O_0p*eta_ep)+eta_p
 generate eta_spy=(O_0s*phi2s*eta_ss)+(O_0s*eta_es)+eta_s
regress pLAB_r m_schmax_r f_schmax_r eta_py eta_spy  
	scalar pi1= _b[eta_py] 
	scalar pi2= _b[m_schmax_r]-(O_p*_b[eta_py]) 
	scalar pi1s=_b[eta_spy] 
	scalar pi2s=_b[f_schmax_r]-(O_s*_b[eta_spy]) 
	scalar list pi1 pi2 pi1s pi2s 
	
*Check child predicted income rank.
gen pLAB_pred=(pi2*m_schmax_r)+(pi2s*f_schmax_r)+(pi1*pm_LAB_r)+(pi1s*pf_LAB_r) 
 su pLAB_r pLAB_pred 
 reg pLAB_pred pm_LAB_pred 
 reg pLAB_r pm_LAB_r 
 
	
* DECOMPOSITION 

	* Get variances of parent income elements
	 su m_schmax_r
	 scalar vHp=r(Var)
	 su eta_sp
	 scalar vEta_sp=r(Var)
	 su eta_ep 
	 scalar vEta_ep=r(Var)
	 su eta_p 
	 scalar vEta_p=r(Var)
	 
	 su eta_py
	 scalar vEta_py=r(Var)
	 scalar list vEta_py
	 di ((O_0p^2)*(phi2^2)*vEta_sp)+((O_0p^2)*vEta_ep)+(vEta_p)
	 
	 su pm_LAB_r
	 di r(Var)
	 scalar vYp=(O_p^2)*vHp + (O_0p^2)*(phi2^2)*vEta_sp + (O_0p^2)*vEta_ep +vEta_p
	 di vYp
	 
	
	* Get variances of parent's spouse's income elements
	 su f_schmax_r
	 scalar vHsp=r(Var)
	 su eta_ss
	 scalar vEta_ss=r(Var)
	 su eta_es 
	 scalar vEta_es=r(Var)
	 su eta_s 
	 scalar vEta_s=r(Var)
	 
	 su eta_spy
	 scalar vEta_spy=r(Var)
	 scalar list vEta_spy
	 
	* Get covariances for parent/spouse components
	corr m_schmax_r eta_spy, cov 
	scalar CovHpEspy=r(cov_12)
	
	corr f_schmax_r eta_py, cov
	scalar CovHspEpy=r(cov_12)
	
	corr eta_sp eta_spy, cov 
	scalar CovEspEspy=r(cov_12)
	
	corr eta_ep eta_spy, cov 
	scalar CovEepEspy=r(cov_12)
	
	corr eta_p eta_spy, cov 
	scalar CovEpEspy=r(cov_12)
	
	corr eta_py eta_spy, cov 
	scalar CovEpyEspy=r(cov_12)


	
*IGE calc:  
scalar B=((pi1*O_p)+pi2)*O_p*(vHp/vYp) + ((pi1s*O_s)+pi2s)*O_p*d1*(vHp/vYp) + pi1*(vEta_py/vYp) + (O_p*pi1s*CovHpEspy/vYp) + (pi1s*CovEpyEspy/vYp) + ((pi1s*O_s)+pi2s)*(CovHspEpy/vYp)
scalar list B IGE


 

*SAVE OUTPUT
clear 

local IGAs		"IGE B"
local pis 		"pi1 pi2 pi1s pi2s"
local varcovs 	"vYp vHp vEta_ep vEta_p vEta_sp vEta_py vHsp vEta_ss vEta_es vEta_s vEta_spy CovHpEspy CovHspEpy CovEpyEspy CovEspEspy CovEepEspy CovEpEspy"
local param		"O_0p O_1p O_p phi1 phi2 phi1s phi2s d1 d1s O_0s O_1s O_s" 						
local Ns		"N Nkids Nfathers Nmothers"

local rownamelist ""
matrix $stubfilenm=(0)
foreach xx in `IGAs' `pis' `varcovs' `param' `Ns' {
	sca y=`xx'
	matrix $stubfilenm=$stubfilenm,y
	local varnamelist "`varnamelist' `xx'"
}
matrix ${stubfilenm}=${stubfilenm}[....,2....]
matrix colnames ${stubfilenm}=`varnamelist'

svmat2 ${stubfilenm}, names(col)
gen model_descr="`model_descr'"
gen modelnum="`modelnum'"
gen yearmin=`yrmin'
gen yearmax=`yrmax'

save ${us_results}/${stubfilenm}.dta, replace


clear
log close 
